In [1]:
import pandas as pd
import plotly.graph_objects as go
import plotly.express as px
import re
# Load the CSV files
xgb_df = pd.read_csv('results/xgb.csv')
ollama_df = pd.read_csv('results/ollama.csv')
In [2]:
import plotly.io as pio
pio.renderers.default = "notebook"
In [3]:
# Show xgb_df in a sortable, interactive table
display(xgb_df)
| machine | CPU | GPU | python | platform | bench | dataset_rows | gpu | train_median_s | infer_median_s | auc | seed | timestamp | |
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| 0 | PC_AL_2025 | AMD Ryzen 5 9600X | Nvidia RTX 5060 Ti 16GB | 3.13.5 | Windows-11-10.0.26100-SP0 | xgboost | 100000 | True | 0.80 | 0.01 | 0.81023 | 42 | 2025-07-23T21:02:39 |
| 1 | PC_AL_2025 | AMD Ryzen 5 9600X | Nvidia RTX 5060 Ti 16GB | 3.13.5 | Windows-11-10.0.26100-SP0 | xgboost | 100000 | False | 0.59 | 0.01 | 0.81072 | 42 | 2025-07-23T21:02:43 |
| 2 | PC_AL_2025 | AMD Ryzen 5 9600X | Nvidia RTX 5060 Ti 16GB | 3.13.5 | Windows-11-10.0.26100-SP0 | xgboost | 1000000 | True | 1.60 | 0.01 | 0.82277 | 42 | 2025-07-23T21:02:54 |
| 3 | PC_AL_2025 | AMD Ryzen 5 9600X | Nvidia RTX 5060 Ti 16GB | 3.13.5 | Windows-11-10.0.26100-SP0 | xgboost | 1000000 | False | 3.99 | 0.10 | 0.82276 | 42 | 2025-07-23T21:03:11 |
| 4 | PC_AL_2025 | AMD Ryzen 5 9600X | Nvidia RTX 5060 Ti 16GB | 3.13.5 | Windows-11-10.0.26100-SP0 | xgboost | full | True | 9.93 | 0.06 | 0.82486 | 42 | 2025-07-23T21:04:25 |
| 5 | PC_AL_2025 | AMD Ryzen 5 9600X | Nvidia RTX 5060 Ti 16GB | 3.13.5 | Windows-11-10.0.26100-SP0 | xgboost | full | False | 50.37 | 1.04 | 0.82495 | 42 | 2025-07-23T21:07:42 |
| 6 | PC_AL_2015 | Intel i5-4690K | Nvidia GTX 1060 6GB | 3.13.5 | Windows-10-10.0.19045-SP0 | xgboost | 100000 | True | 1.66 | 0.01 | 0.80975 | 42 | 2025-07-23T23:13:28 |
| 7 | PC_AL_2015 | Intel i5-4690K | Nvidia GTX 1060 6GB | 3.13.5 | Windows-10-10.0.19045-SP0 | xgboost | 100000 | False | 1.79 | 0.03 | 0.80892 | 42 | 2025-07-23T23:13:36 |
| 8 | PC_AL_2015 | Intel i5-4690K | Nvidia GTX 1060 6GB | 3.13.5 | Windows-10-10.0.19045-SP0 | xgboost | 1000000 | True | 5.80 | 0.04 | 0.82262 | 42 | 2025-07-23T23:14:04 |
| 9 | PC_AL_2015 | Intel i5-4690K | Nvidia GTX 1060 6GB | 3.13.5 | Windows-10-10.0.19045-SP0 | xgboost | 1000000 | False | 13.69 | 0.32 | 0.82227 | 42 | 2025-07-23T23:14:57 |
| 10 | PC_AL_2015 | Intel i5-4690K | Nvidia GTX 1060 6GB | 3.13.5 | Windows-10-10.0.19045-SP0 | xgboost | full | True | 45.20 | 0.38 | 0.82495 | 42 | 2025-07-23T23:18:54 |
| 11 | PC_AL_2015 | Intel i5-4690K | Nvidia GTX 1060 6GB | 3.13.5 | Windows-10-10.0.19045-SP0 | xgboost | full | False | 147.38 | 3.46 | 0.82489 | 42 | 2025-07-23T23:28:06 |
| 12 | Mark1 | AMD Ryzen 7 9800X3D | Nvidia GTX 4080 SUPER | 3.13.5 | Linux-5.15.167.4-microsoft-standard-WSL2-x86_6... | xgboost | 100000 | True | 0.88 | 0.01 | 0.81023 | 42 | 2025-07-26T12:04:36 |
| 13 | Mark1 | AMD Ryzen 7 9800X3D | Nvidia GTX 4080 SUPER | 3.13.5 | Linux-5.15.167.4-microsoft-standard-WSL2-x86_6... | xgboost | 100000 | False | 1.66 | 0.01 | 0.80963 | 42 | 2025-07-26T12:04:42 |
| 14 | Mark1 | AMD Ryzen 7 9800X3D | Nvidia GTX 4080 SUPER | 3.13.5 | Linux-5.15.167.4-microsoft-standard-WSL2-x86_6... | xgboost | 1000000 | True | 1.15 | 0.01 | 0.82277 | 42 | 2025-07-26T12:04:51 |
| 15 | Mark1 | AMD Ryzen 7 9800X3D | Nvidia GTX 4080 SUPER | 3.13.5 | Linux-5.15.167.4-microsoft-standard-WSL2-x86_6... | xgboost | 1000000 | False | 1.67 | 0.06 | 0.82306 | 42 | 2025-07-26T12:05:03 |
| 16 | Mark1 | AMD Ryzen 7 9800X3D | Nvidia GTX 4080 SUPER | 3.13.5 | Linux-5.15.167.4-microsoft-standard-WSL2-x86_6... | xgboost | full | True | 5.42 | 0.03 | 0.82486 | 42 | 2025-07-26T12:06:10 |
| 17 | Mark1 | AMD Ryzen 7 9800X3D | Nvidia GTX 4080 SUPER | 3.13.5 | Linux-5.15.167.4-microsoft-standard-WSL2-x86_6... | xgboost | full | False | 31.07 | 0.62 | 0.82495 | 42 | 2025-07-26T12:08:42 |
| 18 | PC_AL_2025_6000 | AMD Ryzen 5 9600X mem-opt | Nvidia RTX 5060 Ti 16GB | 3.13.5 | Windows-11-10.0.26100-SP0 | xgboost | 100000 | True | 0.77 | 0.01 | 0.81023 | 42 | 2025-07-27T14:22:19 |
| 19 | PC_AL_2025_6000 | AMD Ryzen 5 9600X mem-opt | Nvidia RTX 5060 Ti 16GB | 3.13.5 | Windows-11-10.0.26100-SP0 | xgboost | 100000 | False | 0.55 | 0.01 | 0.81072 | 42 | 2025-07-27T14:22:23 |
| 20 | PC_AL_2025_6000 | AMD Ryzen 5 9600X mem-opt | Nvidia RTX 5060 Ti 16GB | 3.13.5 | Windows-11-10.0.26100-SP0 | xgboost | 1000000 | True | 1.47 | 0.01 | 0.82277 | 42 | 2025-07-27T14:22:36 |
| 21 | PC_AL_2025_6000 | AMD Ryzen 5 9600X mem-opt | Nvidia RTX 5060 Ti 16GB | 3.13.5 | Windows-11-10.0.26100-SP0 | xgboost | 1000000 | False | 3.87 | 0.10 | 0.82276 | 42 | 2025-07-27T14:23:00 |
| 22 | PC_AL_2025_6000 | AMD Ryzen 5 9600X mem-opt | Nvidia RTX 5060 Ti 16GB | 3.13.5 | Windows-11-10.0.26100-SP0 | xgboost | full | True | 9.83 | 0.05 | 0.82486 | 42 | 2025-07-27T14:24:33 |
| 23 | PC_AL_2025_6000 | AMD Ryzen 5 9600X mem-opt | Nvidia RTX 5060 Ti 16GB | 3.13.5 | Windows-11-10.0.26100-SP0 | xgboost | full | False | 47.16 | 1.03 | 0.82495 | 42 | 2025-07-27T14:29:17 |
In [4]:
# Build machine label dynamically (CPU + GPU)
xgb_df["machine_label"] = xgb_df["CPU"] + " + " + xgb_df["GPU"]
# Acceleration label
xgb_df["acc_label"] = xgb_df["gpu"].astype(str).replace({"True": "GPU", "False": "CPU"})
# ------------------ Build dynamic and formatted dataset_rows labels -------------
def format_thousands(s: str) -> str:
n = int(s)
# thousands separator '.'
return f"{n:,}".replace(",", ".")
unique_rows = xgb_df["dataset_rows"].astype(str).unique()
numeric_vals = sorted([int(v) for v in unique_rows if re.fullmatch(r"\d+", v)])
labels_formatted = [format_thousands(str(v)) for v in numeric_vals]
# ensure 'full' appears last
if "full" in unique_rows:
labels_formatted.append("full")
# Map original rows to formatted label
mapping_label = {str(v): format_thousands(str(v)) for v in numeric_vals}
mapping_label["full"] = "full"
xgb_df["rows_label"] = xgb_df["dataset_rows"].astype(str).map(mapping_label)
# Make categorical for ordering (ascending numeric then 'full')
xgb_df["rows_label"] = pd.Categorical(xgb_df["rows_label"], categories=labels_formatted, ordered=True)
# ------------------ Machine-related dynamic parts ------------------
machine_labels = sorted(xgb_df["machine_label"].unique())
# pattern shapes cycled
patterns_cycle = ["", "/", "x", "\\", "-", "|", "+", "."]
pattern_map = {machine: patterns_cycle[i % len(patterns_cycle)]
for i, machine in enumerate(machine_labels)}
# Color map for acceleration
color_map = {"GPU": px.colors.qualitative.Pastel[0],
"CPU": px.colors.qualitative.Pastel[1]}
# ------------------ Build traces ------------------
traces = []
meta = [] # store metadata per trace
for machine in machine_labels:
for acc in ["GPU", "CPU"]:
subset = xgb_df[(xgb_df["machine_label"] == machine) & (xgb_df["acc_label"] == acc)]
if subset.empty:
continue
traces.append(
go.Bar(
y=subset["rows_label"],
x=subset["train_median_s"],
orientation="h",
showlegend=False,
marker=dict(color=color_map[acc], pattern=dict(shape=pattern_map[machine])),
text=[f"{t:.2f}" for t in subset["train_median_s"]],
textposition="outside",
hovertemplate=(
f"{machine}<br>%{{y}} righe · {acc}<br>%{{x:.2f}} s"
"<extra></extra>"
),
)
)
meta.append({"machine": machine, "acc": acc})
fig = go.Figure(data=traces)
# ------------------ Visibility masks ------------------
n = len(traces)
vis_all = [True]*n
vis_gpu_only = [m["acc"] == "GPU" for m in meta]
vis_cpu_only = [m["acc"] == "CPU" for m in meta]
machine_vis_dict = {machine: [m["machine"] == machine for m in meta]
for machine in machine_labels}
# ------------------ Dropdowns ------------------
dropdown_acc = dict(
buttons=[
dict(label="Tutte le accelerazioni", method="update",
args=[{"visible": vis_all},
{"title": "Tempo di training – tutte le accelerazioni"}]),
dict(label="Solo GPU", method="update",
args=[{"visible": vis_gpu_only},
{"title": "Tempo di training – solo GPU"}]),
dict(label="Solo CPU", method="update",
args=[{"visible": vis_cpu_only},
{"title": "Tempo di training – solo CPU"}]),
],
direction="down",
x=1.02,
y=1,
xanchor="left",
yanchor="top",
showactive=True,
bgcolor="white",
bordercolor="lightgray"
)
machine_buttons = [
dict(label="Tutte le macchine", method="update",
args=[{"visible": vis_all},
{"title": "Tempo di training – tutte le macchine"}])
]
for machine in machine_labels:
machine_buttons.append(
dict(label=machine, method="update",
args=[{"visible": machine_vis_dict[machine]},
{"title": f"Tempo di training – {machine}"}])
)
dropdown_machine = dict(
buttons=machine_buttons,
direction="down",
x=1.02,
y=0.8,
xanchor="left",
yanchor="top",
showactive=True,
bgcolor="white",
bordercolor="lightgray"
)
# ------------------ Layout updates ------------------
fig.update_layout(
barmode="group",
title="Tempo di training – tutte le accelerazioni e macchine",
xaxis_title="Train time [s]",
yaxis_title="Numero di righe del dataset",
updatemenus=[dropdown_acc, dropdown_machine],
height = len(xgb_df) * 40,
margin=dict(r=200) # space for menus
)
fig.update_yaxes(categoryorder='array', categoryarray=labels_formatted)
fig.show()
In [5]:
# Show ollama_df with tabulate
display(ollama_df)
| machine | CPU | GPU | python | platform | bench | model | gpu | wall_min_s | wall_med_s | wall_max_s | tok_min_s | tok_med_s | tok_max_s | seed | timestamp | |
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| 0 | PC_AL_2025 | AMD Ryzen 5 9600X | Nvidia RTX 5060 Ti 16GB | 3.13.5 | Windows-11-10.0.26100-SP0 | ollama | phi3:3.8b | True | 6.37 | 6.45 | 14.81 | 133.99 | 136.25 | 136.62 | 42 | 2025-07-23T21:08:10 |
| 1 | PC_AL_2025 | AMD Ryzen 5 9600X | Nvidia RTX 5060 Ti 16GB | 3.13.5 | Windows-11-10.0.26100-SP0 | ollama | phi3:3.8b | False | 31.59 | 34.68 | 34.70 | 21.43 | 21.44 | 21.55 | 42 | 2025-07-23T21:09:52 |
| 2 | PC_AL_2025 | AMD Ryzen 5 9600X | Nvidia RTX 5060 Ti 16GB | 3.13.5 | Windows-11-10.0.26100-SP0 | ollama | qwen3:4b | True | 13.93 | 14.19 | 19.01 | 105.24 | 106.41 | 107.55 | 42 | 2025-07-23T21:10:39 |
| 3 | PC_AL_2025 | AMD Ryzen 5 9600X | Nvidia RTX 5060 Ti 16GB | 3.13.5 | Windows-11-10.0.26100-SP0 | ollama | qwen3:4b | False | 92.86 | 92.92 | 142.73 | 12.89 | 13.18 | 13.19 | 42 | 2025-07-23T21:16:08 |
| 4 | PC_AL_2025 | AMD Ryzen 5 9600X | Nvidia RTX 5060 Ti 16GB | 3.13.5 | Windows-11-10.0.26100-SP0 | ollama | qwen3:14b | True | 26.28 | 26.34 | 29.25 | 40.56 | 40.74 | 40.75 | 42 | 2025-07-23T21:17:30 |
| ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... |
| 59 | PC_AL_2025_6000 | AMD Ryzen 5 9600X mem-opt | Nvidia RTX 5060 Ti 16GB | 3.13.5 | Windows-11-10.0.26100-SP0 | ollama | gemma3n:e2b | False | 10.59 | 10.62 | 12.10 | 31.21 | 31.34 | 31.40 | 42 | 2025-07-27T15:09:52 |
| 60 | PC_AL_2025_6000 | AMD Ryzen 5 9600X mem-opt | Nvidia RTX 5060 Ti 16GB | 3.13.5 | Windows-11-10.0.26100-SP0 | ollama | deepseek-r1:8b | True | 19.55 | 20.60 | 20.68 | 66.85 | 67.15 | 67.28 | 42 | 2025-07-27T15:11:34 |
| 61 | PC_AL_2025_6000 | AMD Ryzen 5 9600X mem-opt | Nvidia RTX 5060 Ti 16GB | 3.13.5 | Windows-11-10.0.26100-SP0 | ollama | deepseek-r1:8b | False | 98.82 | 98.94 | 141.58 | 9.58 | 9.65 | 9.66 | 42 | 2025-07-27T15:20:33 |
| 62 | PC_AL_2025_6000 | AMD Ryzen 5 9600X mem-opt | Nvidia RTX 5060 Ti 16GB | 3.13.5 | Windows-11-10.0.26100-SP0 | ollama | deepseek-r1:14b | True | 15.84 | 15.86 | 23.47 | 39.88 | 40.14 | 40.14 | 42 | 2025-07-27T15:22:00 |
| 63 | PC_AL_2025_6000 | AMD Ryzen 5 9600X mem-opt | Nvidia RTX 5060 Ti 16GB | 3.13.5 | Windows-11-10.0.26100-SP0 | ollama | deepseek-r1:14b | False | 82.42 | 82.44 | 111.48 | 6.78 | 6.80 | 6.81 | 42 | 2025-07-27T15:29:22 |
64 rows × 16 columns
In [6]:
# Acceleration label
ollama_df["acc_label"] = ollama_df["gpu"].astype(str).replace({"True": "GPU", "False": "CPU"})
# Machine label = CPU + GPU (dynamic)
ollama_df["machine_label"] = ollama_df["CPU"] + " + " + ollama_df["GPU"]
# Sorted list of unique models (appearance order preserved)
model_order = list(dict.fromkeys(ollama_df["model"].tolist())) # preserve first appearance order
ollama_df["model"] = pd.Categorical(ollama_df["model"], categories=model_order, ordered=True)
# Sorted list of unique machines
#machine_labels = list(dict.fromkeys(ollama_df["machine_label"].tolist()))
machine_labels = sorted(ollama_df["machine_label"].unique())
# Pattern shapes (cycle if more machines)
patterns_cycle = ["", "/", "x", "\\", "-", "|", "+", "."]
pattern_map = {
machine: patterns_cycle[i % len(patterns_cycle)]
for i, machine in enumerate(machine_labels)
}
# Colour mapping for GPU/CPU
color_map = {"GPU": px.colors.qualitative.Pastel[0], "CPU": px.colors.qualitative.Pastel[1]}
# ------------------ Build traces ------------------
traces = []
meta = []
for machine in machine_labels:
for acc in ["GPU", "CPU"]: # fixed order
subset = ollama_df[(ollama_df["machine_label"] == machine) & (ollama_df["acc_label"] == acc)]
if subset.empty:
continue
traces.append(
go.Bar(
y=subset["model"],
x=subset["tok_med_s"], # average tokens per second
orientation="h",
showlegend=False,
marker=dict(color=color_map[acc], pattern=dict(shape=pattern_map[machine])),
text=[f"{t:.2f}" for t in subset["tok_med_s"]],
textposition="outside",
hovertemplate=(
f"{machine}<br>Modello: %{{y}} · {acc}<br>%{{x:.2f}} token/s"
"<extra></extra>"
),
)
)
meta.append({"machine": machine, "acc": acc})
fig = go.Figure(data=traces)
# ------------------ Visibility masks ------------------
n = len(traces)
vis_all = [True]*n
vis_gpu_only = [m["acc"] == "GPU" for m in meta]
vis_cpu_only = [m["acc"] == "CPU" for m in meta]
machine_vis_dict = {machine: [m["machine"] == machine for m in meta]
for machine in machine_labels}
# ------------------ Dropdowns ------------------
dropdown_acc = dict(
buttons=[
dict(label="Tutte le accelerazioni", method="update",
args=[{"visible": vis_all},
{"title": "Token al secondo – tutte le accelerazioni"}]),
dict(label="Solo GPU", method="update",
args=[{"visible": vis_gpu_only},
{"title": "Token al secondo – solo GPU"}]),
dict(label="Solo CPU", method="update",
args=[{"visible": vis_cpu_only},
{"title": "Token al secondo – solo CPU"}]),
],
direction="down",
x=1.02,
y=1,
xanchor="left",
yanchor="top",
showactive=True,
bgcolor="white",
bordercolor="lightgray"
)
machine_buttons = [
dict(label="Tutte le macchine", method="update",
args=[{"visible": vis_all},
{"title": "Token al secondo – tutte le macchine"}])
]
for machine in machine_labels:
machine_buttons.append(
dict(label=machine, method="update",
args=[{"visible": machine_vis_dict[machine]},
{"title": f"Token al secondo – {machine}"}])
)
dropdown_machine = dict(
buttons=machine_buttons,
direction="down",
x=1.02,
y=0.8,
xanchor="left",
yanchor="top",
showactive=True,
bgcolor="white",
bordercolor="lightgray"
)
# ------------------ Layout ------------------
fig.update_layout(
barmode="group",
title="Token al secondo – tutte le accelerazioni e macchine",
xaxis_title="Token/s",
yaxis_title="Modello LLM",
updatemenus=[dropdown_acc, dropdown_machine],
height = len(ollama_df) * 40,
margin=dict(r=220)
)
fig.show()